notebook.community

Edit and run



In [ ]:

    
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (16, 12)



In [ ]:

    
emotion_score = pd.read_csv('Tokyo_score_raw.txt', index_col=0, sep='\s+')
emotion_score.index = emotion_score.index.astype('datetime64[ns]')
emotion_score.tail()



In [ ]:

    
stock = pd.read_csv('N225_data.txt', sep='\s+', index_col=0)
stock.index = stock.index.astype('datetime64[ns]')
stock.tail()

数据读入完毕，开始按照一定的时间段进行汇总并计算相关性

按照周来进行相关（之前有考虑过照片过少而导致的拟合的问题，现在先不考虑这个情况） count = emotion_all['emotion_score'].resample('MS').count()



In [ ]:

    
gp1 = emotion_score['emotion_score'].resample('MS').mean()
std1 = gp1.std()
miu1 = gp1.mean()
gp11 = (gp1-miu1)/std1  ## 此处进行了汇总之后才进行了归一化，如果不做汇总，归一化结果图像是沿着坐标中紧密波动的
gp11.tail()



In [ ]:

    
gp2 = stock['Close'].resample('MS').mean()
std2 = gp2.std()
miu2 = gp2.mean()
gp22 = (gp2-miu2) /std2
gp22.tail()



In [ ]:

    
df = pd.concat([-gp11, gp22], axis=1, join='inner')
df.plot()
print("相关系数 为 {}".format(df.corr()['emotion_score']['Close']))